Importing data

countMatrix <- ReadDataFrameFromTsv(file.name.path="../data/refSEQ_countMatrix.txt")
## ../data/refSEQ_countMatrix.txt read from disk!
# head(countMatrix)

designMatrix <- ReadDataFrameFromTsv(file.name.path="../design/all_samples_short_names.tsv.csv")
## ../design/all_samples_short_names.tsv.csv read from disk!
# head(designMatrix)

rownames <- as.integer(rownames(countMatrix))

rownames <- rownames[order(rownames)]
rownames.map <- convertGenesViaBiomart(specie="mm10", filter="entrezgene",
                    filter.values=rownames, c("external_gene_name",
                                "mgi_symbol", "entrezgene"))

noNaCountMatrix <- attachGeneColumnToDf(mainDf=countMatrix,
                                genesMap=rownames.map,
                                rowNamesIdentifier="entrezgene",
                                mapFromIdentifier="entrezgene",
                                mapToIdentifier="external_gene_name")


filteredCountsProp <- filterLowCounts(counts.dataframe=noNaCountMatrix, 
                                    is.normalized=FALSE,
                                    design.dataframe=designMatrix,
                                    cond.col.name="gcondition",
                                    method.type="Proportion")
## features dimensions before normalization: 20730
## Filtering out low count features...
## 13580 features are to be kept for differential expression analysis with filtering method 3

Plot PCA of log unnormalized data

pc1_2 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(filteredCountsProp), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC1", yPCA="PC2", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="Prop-Un-Norm")
## [1] TRUE
pc2_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(filteredCountsProp), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC2", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="Prop-Un-Norm")
## [1] TRUE
pc1_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(filteredCountsProp), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC1", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="Prop-Un-Norm")
## [1] TRUE
plotly::subplot(pc1_2, pc2_3, pc1_3, nrows=2, margin = 0.1, titleX=TRUE, titleY=TRUE)

Normalizations

Upper Quartile Normalization

normPropCountsUqua <- NormalizeData(data.to.normalize=filteredCountsProp, 
                                    norm.type="uqua", 
                                    design.matrix=designMatrix)

pc1_2 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(normPropCountsUqua), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC1", yPCA="PC2", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="UQUA-Norm")
## [1] TRUE
pc2_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(normPropCountsUqua), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC2", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="UQUA-Norm")
## [1] TRUE
pc1_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(normPropCountsUqua), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC1", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="UQUA-Norm")
## [1] TRUE
plotly::subplot(pc1_2, pc2_3, pc1_3, nrows=2, margin = 0.1, titleX=TRUE, titleY=TRUE)

Negative control genes

Estimating Negative Control Genes to normalize data

#### estimating neg controls

neg.ctrls <- ReadDataFrameFromTsv(file.name.path="../data/full_SD_Neg_Control_genes_BMC_genomics.csv", row.names.col=NULL)
head(neg.ctrls)
##           Ensembl.ID Mouse.Gene.2.1.ST.probeset.ID
## 1 ENSMUSG00000026544                      17229948
## 2 ENSMUSG00000034071                      17486529
## 3 ENSMUSG00000106547                      17455231
## 4 ENSMUSG00000041057                      17339476
## 5 ENSMUSG00000051788                      17401386
## 6 ENSMUSG00000045267                      17310835
##   Mouse.Genome.430.2.0.probeset.ID    MGI.Symbol                 logFC
## 1                       1453074_at        Dusp23 -1,41369277830705E-05
## 2                       1437705_at        Zfp551 -1,81377838872621E-05
## 3                       1456750_at B230303O12Rik  2,86545700491914E-05
## 4                       1428390_at         Wdr43  4,57864397187535E-05
## 5                       1431660_at 4930564D02Rik    7,335759288285E-05
## 6                       1450585_at      Tas2r119          -0,000114023
##        AveExpr             t      P.Value    adj.P.Val             B
## 1 5,9439547687 -0,0003193989 0,9997464192 0,9998325291 -6,6151570846
## 2 5,9352982951 -0,0002109388 0,9998325291 0,9998325291 -6,6151571134
## 3 4,4201319911  0,0005493522 0,9995638521 0,9997997374 -6,6151569846
## 4 7,9698547438  0,0010387414 0,9991753108 0,9995290429 -6,6151565952
## 5 4,6481138989  0,0010651912 0,9991543115 0,9995290429 -6,6151565673
## 6 4,0937064457 -0,0015836005 0,9987427306 0,9994135667 -6,6151558794
neg.ctrls <- neg.ctrls$MGI.Symbol

neg.map <- convertGenesViaBiomart(specie="mm10", filter="mgi_symbol",
                    filter.values=neg.ctrls, c("external_gene_name",
                                "mgi_symbol", "entrezgene"))

neg.map.nna <- neg.map[-which(is.na(neg.map$entrezgene)),]

# neg.map.nna <- neg.map.nna[which(neg.map.nna$entrezgene %in%  rownames(normPropCountsUqua)),]
# 
neg.ctrls.entrez <- neg.map.nna$entrezgene

ind.ctrls <- which(rownames(normPropCountsUqua) %in% neg.ctrls.entrez)
counts.neg.ctrls <- normPropCountsUqua[ind.ctrls,]
pc1_2 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(counts.neg.ctrls), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC1", yPCA="PC2", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="UQUA-Norm")
## [1] TRUE
pc2_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(counts.neg.ctrls), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC2", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="UQUA-Norm")
## [1] TRUE
pc1_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(counts.neg.ctrls), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC1", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="UQUA-Norm")
## [1] TRUE
plotly::subplot(pc1_2, pc2_3, pc1_3, nrows=2, margin = 0.1, titleX=TRUE, titleY=TRUE)

Upper Quartile + RUVs Normalization

library(RUVSeq)
#groups <- makeGroups(designMatrix$classic)[1,,drop=FALSE]
neg.ctrl.list <- as.character(neg.map.nna$entrezgene[which(neg.map.nna$entrezgene %in% rownames(normPropCountsUqua))])
groups <- makeGroups(paste0(designMatrix$genotype, designMatrix$classic))[c(1, 3),]
ruvedSExprData <- RUVs(as.matrix(round(normPropCountsUqua)), cIdx=neg.ctrl.list,
                       scIdx=groups, k=5)

normExprData <- ruvedSExprData$normalizedCounts

pc1_2 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(normExprData), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC1", yPCA="PC2", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="UQUA+RUV-Norm")
## [1] TRUE
pc2_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(normExprData), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC2", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="UQUA+RUV-Norm")
## [1] TRUE
pc1_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(normExprData), design.matrix=designMatrix, shapeColname="classic", colorColname="gcondition", xPCA="PC1", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="UQUA+RUV-Norm")
## [1] TRUE
plotly::subplot(pc1_2, pc2_3, pc1_3, nrows=2, margin = 0.1, titleX=TRUE, titleY=TRUE)
pal <- RColorBrewer::brewer.pal(9, "Set1")
plotRLE(normExprData, outline=FALSE, col=pal[designMatrix$gcondition])

edgeR DE Analysis

### edgering

desMat <- cbind(designMatrix, ruvedSExprData$W)
colnames(desMat) <- c(colnames(designMatrix), colnames(ruvedSExprData$W))

cc <- c("WTSD5 - WTHC5", "WTRS2 - WTHC7")

rescList1 <- applyEdgeR(counts=normExprData, design.matrix=desMat,
                        factors.column="gcondition", 
                        weight.columns=c("W_1", "W_2", "W_3", "W_4"),
                        contrasts=cc, useIntercept=FALSE, p.threshold=1,
                        verbose=TRUE)

WTSD5 - WTHC5

PlotHistPvalPlot(de.results=rescList1[[1]], design.matrix=desMat, 
                show.plot.flag=TRUE, plotly.flag=TRUE, 
                prefix.plot=names(rescList1)[1])

SD loading positive controls

sd.pos.ctrls <- ReadDataFrameFromTsv(file.name.path="../data/SD_full_pos_control_genes_BMC_genomics.csv")

sd.pos.ctrls <- sd.pos.ctrls$MGI.Symbol

sd.pos.map <- convertGenesViaBiomart(specie="mm10", filter="mgi_symbol",
                    filter.values=sd.pos.ctrls, c("external_gene_name",
                                "mgi_symbol", "entrezgene"))

sd.pos.map.nna <- sd.pos.map[-which(is.na(sd.pos.map$entrezgene)),]

sd.pos.genes.entrez <- sd.pos.map.nna$entrezgene
## mapping ensembl gene id using biomart
res.o.map <- convertGenesViaBiomart(specie="mm10", filter="entrezgene",
                            filter.values=rownames(rescList1[[1]]), 
                            c("external_gene_name", "mgi_symbol", "entrezgene"))


res.o <- attachGeneColumnToDf(mainDf=rescList1[[1]],
                                genesMap=res.o.map,
                                rowNamesIdentifier="entrezgene",
                                mapFromIdentifier="entrezgene",
                                mapToIdentifier="external_gene_name")


PlotVolcanoPlot(de.results=res.o, counts.dataframe=normExprData, design.matrix=desMat,
                show.plot.flag=TRUE, plotly.flag=TRUE, save.plot=FALSE, prefix.plot=names(rescList1)[1], threshold=0.05,
                positive.ctrls.list=sd.pos.genes.entrez)
# PlotMAPlotCounts(de.results=res.o, counts.dataframe=normExprData, design.matrix=desMat,
#                  show.plot.flag=TRUE, plotly.flag=TRUE, save.plot=FALSE, prefix.plot=names(rescList1)[1], threshold=0.05)

WTRS2 - WTHC7

PlotHistPvalPlot(de.results=rescList1[[2]], design.matrix=desMat, 
                show.plot.flag=TRUE, plotly.flag=TRUE, 
                prefix.plot=names(rescList1)[2])

RS loading positive controls

rs2.pos.ctrls <- ReadDataFrameFromTsv(file.name.path="../data/RS2_pos_control_genes_BMC_genomics.csv")

rs2.pos.ctrls <- rs2.pos.ctrls$MGI.Symbol

rs2.pos.map <- convertGenesViaBiomart(specie="mm10", filter="mgi_symbol",
                    filter.values=rs2.pos.ctrls, c("external_gene_name",
                                "mgi_symbol", "entrezgene"))

rs2.pos.map.nna <- rs2.pos.map[-which(is.na(rs2.pos.map$entrezgene)),]

rs2.pos.genes.entrez <- rs2.pos.map.nna$entrezgene
res.o.map <- convertGenesViaBiomart(specie="mm10", filter="entrezgene",
                            filter.values=rownames(rescList1[[2]]), 
                            c("external_gene_name", "mgi_symbol", "entrezgene"))


res.o <- attachGeneColumnToDf(mainDf=rescList1[[1]],
                                genesMap=res.o.map,
                                rowNamesIdentifier="entrezgene",
                                mapFromIdentifier="entrezgene",
                                mapToIdentifier="external_gene_name")


PlotVolcanoPlot(de.results=res.o, counts.dataframe=normExprData, design.matrix=desMat,
                show.plot.flag=TRUE, plotly.flag=TRUE, save.plot=FALSE, prefix.plot=names(rescList1)[2], threshold=0.05, 
                positive.ctrls.list=rs2.pos.genes.entrez)
# PlotMAPlotCounts(de.results=res.o, counts.dataframe=normExprData, design.matrix=desMat,
                 # show.plot.flag=TRUE, plotly.flag=TRUE, save.plot=FALSE, prefix.plot=names(rescList1)[2], threshold=0.05)